📊 Plot untuk Dashboard

# import library
library(dplyr)
library(ggplot2)
library(scales)
library(glue)
library(plotly)
library(lubridate)
options(scipen = 100)

# read data
vids <- read.csv("data_input/youtubetrends_2023.csv", stringsAsFactors = TRUE, encoding = "latin1")

# cleansing data
vids_clean <- vids %>% 
  
  # deselect kolom yang tidak dibutuhkan
  select(-c(comments_disabled, ratings_disabled, video_error_or_removed)) %>% 
  
  # manipulasi kolom
  mutate(
    
    # mengubah tipe data
    trending_date = ymd(trending_date),
    publish_time = ymd_hms(publish_time),
    title = as.character(title),
    channel_title = as.character(channel_title),
    
    # menambah kolom baru
    likesp = likes/views,
    dislikep = dislikes/views,
    commentp = comment_count/views
    
  )

Plot1: Trending Categories of YouTube US 2023

# Data Wrangling
vids_count <- vids_clean %>% 
  group_by(category_id) %>% 
  summarise(count = n()) %>% 
  ungroup() %>% 
  arrange(-count)

vids_count <- vids_count %>% 
  mutate(
    label = glue(
      "Category: {category_id}
      Video count: {comma(count)}"
    )
  )

# Visualization
plot1 <- ggplot(data = vids_count, aes(x = count, 
                              y = reorder(category_id, count),
                              text = label)) + # reorder(A, berdasarkan B)
  geom_col(aes(fill = count)) +
  scale_fill_gradient(low="red", high="black") +
  labs(title = "Trending Categories of YouTube US 2023",
       x = "Video Count",
       y = NULL) +
  scale_x_continuous(labels = comma) +
  theme_minimal() +
  theme(legend.position = "none")

ggplotly(plot1, tooltip = "text")

Plot2: Top 10 Channel on Gaming

# Data wrangling
vids_10 <- vids_clean %>%
  filter(category_id=="Gaming") %>%
  group_by(channel_title) %>% 
  summarise(mean_viewers = mean(views)) %>% 
  ungroup() %>% 
  arrange(-mean_viewers) %>%
  head(10)

vids_10 <- vids_10 %>% 
  mutate(label = glue(
      "Channel : {channel_title}
       Average Views: {comma(mean_viewers)}"
    )
 )

# Visualization
plot2 <- ggplot(vids_10, aes(x = reorder(channel_title, mean_viewers), 
                    y = mean_viewers,
                    text = label)) +
  
  geom_segment(aes(xend=reorder(channel_title, mean_viewers), y=0,yend=mean_viewers), color="red") +
  geom_point(color="black", size=3) +
  coord_flip() +
  scale_y_continuous(labels = comma) +
  labs(title = "Top 10 Channel on Gaming",
       x = NULL,
       y = "Average View") +
  theme_minimal()

ggplotly(plot2, tooltip = "text")

Plot 3: Viewers Activity of Gaming Videos

# Data wrangling
vids_trend <- vids_clean %>% 
  filter(category_id == "Gaming") %>% 
  group_by(publish_hour) %>% 
  summarise(avg_views = mean(views)) %>% 
  ungroup() %>% 
  mutate(
    label2 = glue(
      "Publish Hour: {publish_hour}
      Average views: {comma(round(avg_views, 2))}"
    )
  )

# Visualization
plot3 <- ggplot(vids_trend, aes(x=publish_hour, y= avg_views))+
  geom_line(col="red") +
  geom_point(aes(text=label2), col="black") +
  scale_y_continuous(labels = comma, breaks = seq(0, 8000000, 1000000)) +
  scale_x_continuous(breaks = seq(0,23,1)) +
  labs(
    title = "Viewers Activity of Gaming Videos",
    x = "Publish Hours",
    y = "Average Views"
  ) +
  theme_minimal()

ggplotly(plot3, tooltip = "text")